The aim of this project is to classify people's emotions based on their facial images. We have got more than 20000 images with their associated facial expressions labels and around 2000 images with their facial-keypoints annotations.
We combined two models:
#mounting the drive
from google.colab import drive
drive.mount('/content/drive')
#setting our working directory
%cd /content/drive/My Drive/Colab Notebooks/Modern AI Portfolio Builder/Emotion AI/Emotion_AI_Dataset/Emotion AI Dataset
# importing libraries
import pandas as pd
import numpy as np
import os
import PIL
import seaborn as sns
import pickle
from PIL import *
import cv2
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, LearningRateScheduler
from IPython.display import display
from tensorflow.python.keras import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, optimizers
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.layers import *
from tensorflow.keras import backend as K
from keras import optimizers
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from google.colab.patches import cv2_imshow
#loading my data
keyfacial_df=pd.read_csv('/content/drive/My Drive/Colab Notebooks/Modern AI Portfolio Builder/Emotion AI/Emotion_AI_Dataset/Emotion AI Dataset/data.csv')
keyfacial_df
#obtain relevant information about dataframe
keyfacial_df.info()
The dataset contains one feature of type object this is pixel of face images
#check if null values exist in the dataframe
keyfacial_df.isnull().sum() # There is non null values in the dataset
keyfacial_df['Image'].shape # we have 2140 images with their facial keypoints annotations
Since values for the images are given as space separated string, we separate the values using ' ' as separator. Then we convert them into numpy array np.formstring and convert the obtain 1D array into 2D array of shape(96,96).
keyfacial_df['Image']=keyfacial_df['Image'].apply(lambda x: np.fromstring(x, dtype=int, sep=' ').reshape(96,96))
#Obtain the shape of the Image
keyfacial_df['Image'][0].shape # The first image is 96X96
keyfacial_df.describe()
Let's at first plot a random image from the dataset along with facial keypoints. Image data is obtain from df['Image'] and plotted using plt.imshow. x and y coordinates are obtain from the corresponding image. since x_coordinates are in even column like 0,2,4,... and y_coordinates are in odd column ones, we access their value using .loc command, which get the values for coordinates of the image based on the column it is refering.
i=np.random.randint(1,len(keyfacial_df))
plt.imshow(keyfacial_df['Image'][i], cmap='gray')
for j in range(1,31,2):
plt.plot(keyfacial_df.loc[i][j-1], keyfacial_df.loc[i][j], 'rx')
Let's see the 16 first images along with their facial keypoints in a grid format.
fig= plt.figure(figsize=(20,20))
for i in range(16):
ax=fig.add_subplot(4,4,i+1)
image=plt.imshow(keyfacial_df['Image'][i], cmap='gray')
for j in range(1,31,2):
plt.plot(keyfacial_df.loc[i][j-1],keyfacial_df.loc[i][j], 'rx')
#This is for a rendom visualization
import random
fig= plt.figure(figsize=(20,20))
for i in range(64):
k=random.randint(1,len(keyfacial_df)) #random image selection
ax=fig.add_subplot(8,8,i+1)
image=plt.imshow(keyfacial_df['Image'][k], cmap='gray')
for j in range(1,31,2):
plt.plot(keyfacial_df.loc[k][j-1],keyfacial_df.loc[k][j], 'rx')
#Creating a new copy of the dataframe
import copy
keyfacial_df_copy=copy.copy(keyfacial_df)
#Obtains the columns in the dataframe keyfacial_df
columns=keyfacial_df_copy.columns[:-1]
columns
keyfacial_df_copy['Image']=keyfacial_df_copy['Image'].apply(lambda x: np.flip(x,axis=1))
Since we are flipping along y axis, y coordinates would be the same, only x coordinates would change. What we have to do then is to substract our initial x-coordinates values from width of the image.
for i in range(len(columns)):
if i%2==0:
keyfacial_df_copy[columns[i]]=keyfacial_df_copy[columns[i]].apply(lambda x: 96. - float(x))
fig.subplots?
#show the original image
fig=plt.figure()
axes=fig.subplots(nrows=1,ncols=2)
axes[0].imshow(keyfacial_df['Image'][0], cmap='gray')
for j in range(1,31,2):
axes[0].plot(keyfacial_df.loc[0][j-1], keyfacial_df.loc[0][j], 'rx')
axes[0].set_title("Original Image")
axes[1].imshow(keyfacial_df_copy['Image'][0], cmap='gray')
for j in range(1,31,2):
axes[1].plot(keyfacial_df_copy.loc[0][j-1], keyfacial_df_copy.loc[0][j], 'rx')
axes[1].set_title("Flipped image")
#concatenate the original dataframe with the flipped dataframe
augmented_df=np.concatenate((keyfacial_df,keyfacial_df_copy))
augmented_df.shape
import random
keyfacial_df_copy=copy.copy(keyfacial_df)
keyfacial_df_copy['Image']=keyfacial_df_copy['Image'].apply(lambda x: np.clip(random.uniform(1.5, 2)*x,0.0,255.0))
augmented_df=np.concatenate((augmented_df,keyfacial_df_copy))
augmented_df.shape
#showing image with increased brightness
plt.imshow(keyfacial_df_copy['Image'][0], cmap='gray')
for j in range(1,31,2):
plt.plot(keyfacial_df_copy.loc[0][j-1],keyfacial_df_copy.loc[0][j], 'rx')
#obtain the images which is present in the 31 st column (since value start from 0, we refer to 31st column by 30)
img=augmented_df[:,30]
#normalize the images
img=img/255.
#create an empty array of shape (x, 96,96,1) to feed the model
X=np.empty((len(img), 96, 96, 1))
#Iterate through the img list and add image value to the empty array after expanding it's dimension from (96, 96) to (96,96, 1)
for i in range(len(img)):
X[i,]=np.expand_dims(img[i], axis=2)
#convert the array type to float32
X=np.asarray(X).astype(np.float32)
X.shape
#Obtain the value of x & y coordinates which are to be used as target
y=augmented_df[:,:30]
y=np.asarray(y).astype(np.float32)
y.shape
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.1)
X_train.shape
def res_block(X, filter, stage):
# Convolutional_block
X_copy = X
f1 , f2, f3 = filter
# Main Path
X = Conv2D(f1, (1,1),strides = (1,1), name ='res_'+str(stage)+'_conv_a', kernel_initializer= glorot_uniform(seed = 0))(X)
X = MaxPool2D((2,2))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_conv_a')(X)
X = Activation('relu')(X)
X = Conv2D(f2, kernel_size = (3,3), strides =(1,1), padding = 'same', name ='res_'+str(stage)+'_conv_b', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_conv_b')(X)
X = Activation('relu')(X)
X = Conv2D(f3, kernel_size = (1,1), strides =(1,1),name ='res_'+str(stage)+'_conv_c', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_conv_c')(X)
# Short path
X_copy = Conv2D(f3, kernel_size = (1,1), strides =(1,1),name ='res_'+str(stage)+'_conv_copy', kernel_initializer= glorot_uniform(seed = 0))(X_copy)
X_copy = MaxPool2D((2,2))(X_copy)
X_copy = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_conv_copy')(X_copy)
# ADD
X = Add()([X,X_copy])
X = Activation('relu')(X)
# Identity Block 1
X_copy = X
# Main Path
X = Conv2D(f1, (1,1),strides = (1,1), name ='res_'+str(stage)+'_identity_1_a', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_identity_1_a')(X)
X = Activation('relu')(X)
X = Conv2D(f2, kernel_size = (3,3), strides =(1,1), padding = 'same', name ='res_'+str(stage)+'_identity_1_b', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_identity_1_b')(X)
X = Activation('relu')(X)
X = Conv2D(f3, kernel_size = (1,1), strides =(1,1),name ='res_'+str(stage)+'_identity_1_c', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_identity_1_c')(X)
# ADD
X = Add()([X,X_copy])
X = Activation('relu')(X)
# Identity Block 2
X_copy = X
# Main Path
X = Conv2D(f1, (1,1),strides = (1,1), name ='res_'+str(stage)+'_identity_2_a', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_identity_2_a')(X)
X = Activation('relu')(X)
X = Conv2D(f2, kernel_size = (3,3), strides =(1,1), padding = 'same', name ='res_'+str(stage)+'_identity_2_b', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_identity_2_b')(X)
X = Activation('relu')(X)
X = Conv2D(f3, kernel_size = (1,1), strides =(1,1),name ='res_'+str(stage)+'_identity_2_c', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_'+str(stage)+'_identity_2_c')(X)
# ADD
X = Add()([X,X_copy])
X = Activation('relu')(X)
return X
input_shape = (96, 96, 1)
# Input tensor shape
X_input = Input(input_shape)
# Zero-padding
X = ZeroPadding2D((3,3))(X_input)
# 1 - stage
X = Conv2D(64, (7,7), strides= (2,2), name = 'conv1', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_conv1')(X)
X = Activation('relu')(X)
X = MaxPooling2D((3,3), strides= (2,2))(X)
# 2 - stage
X = res_block(X, filter= [64,64,256], stage= 2)
# 3 - stage
X = res_block(X, filter= [128,128,512], stage= 3)
# Average Pooling
X = AveragePooling2D((2,2), name = 'Averagea_Pooling')(X)
# Final layer
X = Flatten()(X)
X = Dense(4096, activation = 'relu')(X)
X = Dropout(0.2)(X)
X = Dense(2048, activation = 'relu')(X)
X = Dropout(0.1)(X)
X = Dense(30, activation = 'relu')(X)
model_1_facialKeyPoints = Model( inputs= X_input, outputs = X)
model_1_facialKeyPoints.summary()
adam = tf.keras.optimizers.Adam(learning_rate = 0.0001, beta_1 = 0.9, beta_2 = 0.999, amsgrad = False)
model_1_facialKeyPoints.compile(loss = "mean_squared_error", optimizer = adam , metrics = ['accuracy'])
# Check this out for more information on Adam optimizer: https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam
# Defining a checkpointer: save the best model with least validation loss
checkpointer = ModelCheckpoint(filepath = "FacialKeyPoints_weights.hdf5", verbose = 1, save_best_only = True)
Now we train the model with just 2 epochs to show it works. Our Already trained model is going to be loaded after then.
#Training the model with just 2 epochs
history = model_1_facialKeyPoints.fit(X_train, y_train, batch_size = 32, epochs = 2, validation_split = 0.05, callbacks=[checkpointer])
# save the model architecture to json file for future use
model_json = model_1_facialKeyPoints.to_json()
with open("FacialKeyPoints-model.json","w") as json_file:
json_file.write(model_json)
We are going to load our already trained model and with the learning architecture presented above.
# Loading the facial key point model already trained
with open('detection.json', 'r') as json_file:
json_savedModel= json_file.read()
# load the model architecture
model_1_facialKeyPoints = tf.keras.models.model_from_json(json_savedModel)
model_1_facialKeyPoints.load_weights('weights_keypoint.hdf5')
adam = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, amsgrad=False)
model_1_facialKeyPoints.compile(loss="mean_squared_error", optimizer= adam , metrics = ['accuracy'])
Evaluating the model with our test sets, we have got 83% accuracy, this is a great model !!
#Evaluate the model
result=model_1_facialKeyPoints.evaluate(X_test,y_test)
print("Accuracy: {}".format(result[1]))
#Read the csv files for the facial expression data
facialexpression_df=pd.read_csv('icml_face_data.csv')
facialexpression_df.head()
facialexpression_df[' pixels'][0] # the first image pixels, it is in a string format
def string2array(x):
return np.array(x.split(' ')).reshape(48,48,1).astype('float32')
#Resizes images from (48? 48) to (96, 96)
def resize(x):
img=x.reshape(48, 48)
return cv2.resize(img, dsize=(96,96), interpolation=cv2.INTER_CUBIC)
facialexpression_df[' pixels']=facialexpression_df[' pixels'].apply(lambda x: string2array(x))
facialexpression_df[' pixels']=facialexpression_df[' pixels'].apply(lambda x: resize(x))
facialexpression_df.head()
#Check the shape of the data
facialexpression_df.shape
#checking the presence of null values in the data frame
facialexpression_df.isnull().sum()
label_to_text={0:'anger',1:'disgust',2:'sad',3:'happiness',4:'surprise'}
emotion = [0, 1, 2, 3, 4]
for i in emotion:
data=facialexpression_df[facialexpression_df['emotion'] == i][:1] # choice of the first image from the i-labeled images
img=data[' pixels'].item()
img=img.reshape(96, 96)
plt.figure()
plt.title(label_to_text[i])
plt.imshow(img,cmap='gray')
Let's check if our data is balanaced
facialexpression_df.emotion.value_counts().index
facialexpression_df.emotion.value_counts()
plt.figure(figsize=(10,10))
sns.barplot(x=facialexpression_df.emotion.value_counts().index,y=facialexpression_df.emotion.value_counts())
The disgust-labeled face images are few compared to other lables, we can consider upsampling images with this label.
Splitting the dataframe into features and labels
from keras.utils import to_categorical
X=facialexpression_df[' pixels']
y=to_categorical(facialexpression_df['emotion'])
X
y
X=np.stack(X, axis=0)
X=X.reshape(24568,96,96,1)
print(X.shape,y.shape)
from sklearn.model_selection import train_test_split
X_train, X_Test, y_train,y_Test=train_test_split(X,y,test_size=0.1, shuffle=True)
X_val, X_Test,y_val, y_Test=train_test_split(X_Test,y_Test,test_size=0.5, shuffle=True)
print(X_val.shape,y_val.shape)
#Image pre_processing
X_train=X_train/255
X_val=X_val/255
X_Test=X_Test/255
print(X_Test.shape,y_Test.shape)
print(X_val.shape,y_val.shape)
print(X_train.shape,y_train.shape)
#Another way of performing data augmentation
train_datagen=ImageDataGenerator(
rotation_range=15,
width_shift_range=0.1,
shear_range=0.1,
zoom_range=0.1,
horizontal_flip=True,
vertical_flip=True,
brightness_range=[1.1,1.5],
fill_mode="nearest"
)
input_shape = (96, 96, 1)
# Input tensor shape
X_input = Input(input_shape)
# Zero-padding
X = ZeroPadding2D((3, 3))(X_input)
# 1 - stage
X = Conv2D(64, (7, 7), strides= (2, 2), name = 'conv1', kernel_initializer= glorot_uniform(seed = 0))(X)
X = BatchNormalization(axis =3, name = 'bn_conv1')(X)
X = Activation('relu')(X)
X = MaxPooling2D((3, 3), strides= (2, 2))(X)
# 2 - stage
X = res_block(X, filter= [64, 64, 256], stage= 2)
# 3 - stage
X = res_block(X, filter= [128, 128, 512], stage= 3)
# 4 - stage
# X = res_block(X, filter= [256, 256, 1024], stage= 4)
# Average Pooling
X = AveragePooling2D((4, 4), name = 'Averagea_Pooling')(X)
# Final layer
X = Flatten()(X)
X = Dense(5, activation = 'softmax', name = 'Dense_final', kernel_initializer= glorot_uniform(seed=0))(X)
model_2_emotion = Model( inputs= X_input, outputs = X, name = 'Resnet18')
model_2_emotion.summary()
model_2_emotion.compile(optimizer="Adam", loss="categorical_crossentropy", metrics=["accuracy"])
earlystopping=EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=60)
checkpointer=ModelCheckpoint(filepath="FacialExpression_weights.hdf5", verbose=1, save_best_only=True)
Training the model using just 5 epochs. The already trained model will be uploaded after then
history=model_2_emotion.fit(train_datagen.flow(X_train,y_train,batch_size=32), validation_data=(X_val,y_val), steps_per_epoch=len(X_train) // 64,
epochs=5,callbacks=[checkpointer,earlystopping])
#saving the model architecture to json file for future use
model_json=model_2_emotion.to_json()
with open("FacialExpression-model.json","w") as json_file:
json_file.write(model_json)
#Loading the emoton detection model already trained with many epochs:200 epochs
with open('emotion.json','r') as json_file:
json_savedModel=json_file.read()
#load the model architecture
model_2_emotion=tf.keras.models.model_from_json(json_savedModel)
model_2_emotion.load_weights('weights_emotions.hdf5')
model_2_emotion.compile(optimizer="Adam", loss="categorical_crossentropy", metrics=["accuracy"])
#Evaluating the model
score=model_2_emotion.evaluate(X_Test,y_Test)
print('Test_Accuracy:{}'.format(score[1]))
predicted_classes=model_2_emotion.predict(X_Test)
predicted_classes # give the probabilities of each class for each sample
predicted_classes=np.argmax(model_2_emotion.predict(X_Test),axis=-1)
predicted_classes
y_true=np.argmax(y_Test,axis=-1)
y_true
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_true,predicted_classes)
plt.figure(figsize=(10,10))
sns.heatmap(cm, annot=True, cbar=False)
#Reporting the classification
from sklearn.metrics import classification_report
print(classification_report(y_true, predicted_classes))
Let's visualize the dataset the predicted and true expressions with their associated image
L = 5
W = 5
fig, axes = plt.subplots(L, W, figsize = (24, 24))
axes = axes.ravel()
for i in np.arange(0, L*W):
axes[i].imshow(X_Test[i].reshape(96,96), cmap = 'gray')
axes[i].set_title('Prediction = {}\n True = {}'.format(label_to_text[predicted_classes[i]], label_to_text[y_true[i]]))
axes[i].axis('off')
plt.subplots_adjust(wspace = 1)
def predict(X_Test):
# Making prediction from the key point model
df_predict=model_1_facialKeyPoints.predict(X_Test)
# Makin prediction from the emotion model
df_emotion=np.argmax(model_2_emotion.predict(X_Test), axis=-1)
#Reshaping array from (856,) to (856,1)
df_emotion=np.expand_dims(df_emotion, axis=1)
#Converting the predictions into a dataframe
df_predict=pd.DataFrame(df_predict, columns=columns)
#Adding emotion into the predicted dataframe
df_predict['emotion']=df_emotion
return df_predict
df_predict=predict(X_test)
df_predict.head()
df_predict.shape
fig= plt.figure(figsize=(20,20))
for i in range(25):
ax=fig.add_subplot(5,5,i+1)
plt.imshow(X_test[i].squeeze(), cmap='gray')
plt.title('predicted class'+ ':'+' '+ label_to_text[df_predict['emotion'][i]])
for j in range(1,31,2):
plt.plot(df_predict.loc[i][j-1], df_predict.loc[i][j], 'rx')
import json
import tensorflow.keras.backend as K
def deploy(directory, model):
MODEL_DIR=directory
version=1
#let's join the tmp model directory with our chosen version number
#The expected result will be = '\tmp\version_number'
export_path=os.path.join(MODEL_DIR, str(version))
print('export_path={}\n'.format(export_path))
# Let's save the model using saved_model.save
# If the directory already exist, we will remove it using '!rm'
# rm removes each file specified in the command line.
if os.path.isdir(export_path):
print('\nAlready saved a model, cleaning up\n')
!rm -r {export_path}
tf.saved_model.save(model, export_path)
os.environ["MODEL_DIR"]=MODEL_DIR
# Let's add tensorflow-model-server package to our list of packages
!echo "deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal" | tee /etc/apt/sources.list.d/tensorflow-serving.list && \
curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add -
!apt update
# Let's install tensorflow model server
!apt-get install tensorflow-model-server
Deployment of the model
deploy('/model', model_1_facialKeyPoints)
%%bash --bg
nohup tensorflow_model_server \
--rest_api_port=4500 \
--model_name=keypoint_model \
--model_base_path="${MODEL_DIR}" >server.log 2>&1
!tail server.log
deploy('/model1', model_2_emotion)
%%bash --bg
nohup tensorflow_model_server \
--rest_api_port=4000 \
--model_name=emotion_model \
--model_base_path="${MODEL_DIR}" >server.log 2>&1
!tail server.log
Let's then make an inference
import json
#Let's create a json object and make free inference requests
data=json.dumps({"signature_name": "serving_default","instances":X_test[0:3].tolist()})
print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))
import requests
# Function to make predictions from deployed models
def response(data):
headers = {"content-type": "application/json"}
json_response = requests.post('http://localhost:4500/v1/models/keypoint_model/versions/1:predict', data=data, headers=headers, verify = False)
df_predict = json.loads(json_response.text)['predictions']
json_response = requests.post('http://localhost:4000/v1/models/emotion_model/versions/1:predict', data=data, headers=headers, verify = False)
df_emotion = np.argmax(json.loads(json_response.text)['predictions'], axis = 1)
# Reshaping array from (856,) to (856,1)
df_emotion = np.expand_dims(df_emotion, axis = 1)
# Converting the predictions into a dataframe
df_predict= pd.DataFrame(df_predict, columns = columns)
# Adding emotion into the predicted dataframe
df_predict['emotion'] = df_emotion
return df_predict
# making prediction
df_predict = response(data)
df_predict
# Plotting the test images and their predicted keypoints and emotions
fig, axes = plt.subplots(3, 1, figsize = (24, 24))
axes = axes.ravel()
for i in range(3):
axes[i].imshow(X_test[i].squeeze(), cmap='gray')
axes[i].set_title('Prediction = {}'.format(label_to_text[df_predict['emotion'][i]]))
axes[i].axis('off')
for j in range(1,31,2):
axes[i].plot(df_predict.loc[i][j-1], df_predict.loc[i][j], 'rx')
We have successfully deployed and made inference with our model running tensorflow serving.
This project is a beginnning of a list computer vision projects coming very soon so, stay tune if you want to know more about it.